4  Appendix A

4.1 Setup

4.1.1 Install Packages

We install the following packages using the groundhog package manager to increase computational reproducibility.

options(repos = c(CRAN = "https://cran.r-project.org")) 

if (!requireNamespace("groundhog", quietly = TRUE)) {
  install.packages("groundhog")
}

pkgs <- c("magrittr", "data.table", "stringr", "Rmisc", "gt")

groundhog::groundhog.library(pkg = pkgs,
                             date = "2024-08-01")

rm(pkgs)

4.1.2 Read Data

data <- readRDS(file="../data/processed/full.Rda")
vars <- str_subset(string = names(data), pattern = "^.{1,4}$", negate = TRUE)
covariates <- data[stage == 1, ..vars]

4.2 Table A.1

Using the {gt} package, we produce Table A.1 in the steps. First, we create a table containing all the covariates that are treated dummies or continuous variables (see Table 4.1). In a second step, we create Table 4.2 as a separate table.

long_df <- melt(covariates, 
                id.vars = c("surprise", "communication"), 
                measure.vars = c("age_18_34", "age_35_52", "age_53_plus", "female", "high_education", "high_income", "married", "parentship", "high_temperature", "high_usage", "high_general_risk", "high_weather_risk", "high_accuracy", "high_credibility", "temperature", "usage", "general_risk", "weather_risk", "accuracy", "credibility"), 
                variable.name = "Variable", 
                value.name = "Value")

pooled_summary <- long_df[, 
                          .(N = sum(!is.na(Value)), 
                            Mean = mean(Value, na.rm = TRUE)), 
                          by = .(Variable)]

summary_tmp <- long_df[, 
                       .(N = sum(!is.na(Value)), 
                         Mean = mean(Value, na.rm = TRUE)), 
                       by = .(surprise, communication, Variable)]

summary_table <- merge(pooled_summary, 
                       summary_tmp, 
                       by = "Variable", 
                       suffixes = c("_pooled", ""))

summary_table_wide <- dcast(summary_table, 
                            Variable + N_pooled + Mean_pooled ~ surprise + communication, 
                            value.var = c("N", "Mean"))

setcolorder(summary_table_wide, c("Variable", "N_pooled", "Mean_pooled", 
                                  "N_FALSE_point", "Mean_FALSE_point", 
                                  "N_FALSE_both", "Mean_FALSE_both", 
                                  "N_FALSE_interval", "Mean_FALSE_interval", 
                                  "N_TRUE_point", "Mean_TRUE_point", 
                                  "N_TRUE_both", "Mean_TRUE_both", 
                                  "N_TRUE_interval", "Mean_TRUE_interval"))

summary_table_wide %>% 
  gt() %>% 
  cols_label(
    Variable = "Treatment Variable",
    N_pooled = "N", Mean_pooled = "Mean",
    N_FALSE_point = "N", Mean_FALSE_point = "Mean",
    N_FALSE_both = "N", Mean_FALSE_both = "Mean",
    N_FALSE_interval = "N", Mean_FALSE_interval = "Mean",
    N_TRUE_point = "N", Mean_TRUE_point = "Mean",
    N_TRUE_both = "N", Mean_TRUE_both = "Mean",
    N_TRUE_interval = "N", Mean_TRUE_interval = "Mean"
  ) %>%
  tab_spanner(
    label = "Pooled",
    columns = c(N_pooled, Mean_pooled)
  ) %>%
  tab_spanner(
    label = "Confirmation",
    columns = c(N_FALSE_point, Mean_FALSE_point, N_FALSE_both, Mean_FALSE_both, N_FALSE_interval, Mean_FALSE_interval)
  ) %>%
  tab_spanner(
    label = "Contradiction",
    columns = c(N_TRUE_point, Mean_TRUE_point, N_TRUE_both, Mean_TRUE_both, N_TRUE_interval, Mean_TRUE_interval)
  ) %>%
  tab_spanner(
    label = "Point",
    id = "conf_point",
    columns = c(N_FALSE_point, Mean_FALSE_point),
    level = 2
  ) %>%
  tab_spanner(
    label = "Both",
    id = "conf_both",
    columns = c(N_FALSE_both, Mean_FALSE_both),
    level = 2
  ) %>%
  tab_spanner(
    label = "Interval",
    id = "conf_interval",
    columns = c(N_FALSE_interval, Mean_FALSE_interval),
    level = 2
  ) %>%
  tab_spanner(
    label = "Point",
    id = "contra_point",
    columns = c(N_TRUE_point, Mean_TRUE_point),
    level = 2
  ) %>%
  tab_spanner(
    label = "Both",
    id = "contra_both",
    columns = c(N_TRUE_both, Mean_TRUE_both),
    level = 2
  ) %>%
  tab_spanner(
    label = "Interval",
    id = "contra_interval",
    columns = c(N_TRUE_interval, Mean_TRUE_interval),
    level = 2
  ) %>%
  fmt_number(
    columns = starts_with("Mean"),
    decimals = 3
  ) %>%
  fmt_number(
    columns = starts_with("N"),
    decimals = 0
  ) %>%
  cols_align(
    align = "left",
    columns = c(Variable)
  )
Table 4.1: Descriptive statistics (control variables): Mean values per treatment
Point Both Interval Point Both Interval
Treatment Variable Pooled Confirmation Contradiction
N Mean N Mean N Mean N Mean N Mean N Mean N Mean
age_18_34 1,503 0.335 255 0.325 247 0.296 243 0.309 251 0.386 249 0.345 258 0.345
age_35_52 1,503 0.306 255 0.310 247 0.300 243 0.313 251 0.299 249 0.313 258 0.302
age_53_plus 1,503 0.359 255 0.365 247 0.405 243 0.379 251 0.315 249 0.341 258 0.353
female 1,503 0.490 255 0.463 246 0.480 243 0.444 251 0.542 250 0.528 258 0.484
high_education 1,505 0.618 255 0.616 247 0.575 243 0.687 252 0.643 250 0.612 258 0.578
high_income 1,363 0.525 230 0.552 227 0.498 220 0.564 225 0.547 227 0.471 234 0.517
married 1,505 0.396 255 0.427 247 0.421 243 0.379 252 0.389 250 0.356 258 0.403
parentship 1,505 0.429 255 0.400 247 0.482 243 0.395 252 0.440 250 0.404 258 0.450
high_temperature 1,505 0.591 255 0.557 247 0.543 243 0.613 252 0.575 250 0.668 258 0.589
high_usage 1,505 0.511 255 0.518 247 0.526 243 0.494 252 0.548 250 0.516 258 0.465
high_general_risk 1,505 0.540 255 0.561 247 0.514 243 0.506 252 0.512 250 0.552 258 0.589
high_weather_risk 1,505 0.633 255 0.600 247 0.587 243 0.588 252 0.679 250 0.660 258 0.678
high_accuracy 1,505 0.786 255 0.800 247 0.810 243 0.819 252 0.698 250 0.836 258 0.756
high_credibility 1,505 0.503 255 0.490 247 0.615 243 0.560 252 0.369 250 0.504 258 0.484
temperature 1,505 20.051 255 19.906 247 19.526 243 20.267 252 20.079 250 20.468 258 20.062
usage 1,505 3.347 255 3.408 247 3.348 243 3.350 252 3.393 250 3.308 258 3.275
general_risk 1,505 4.193 255 4.290 247 3.988 243 4.012 252 4.048 250 4.396 258 4.407
weather_risk 1,505 5.260 255 5.122 247 4.935 243 4.967 252 5.433 250 5.496 258 5.585
accuracy 1,505 2.241 255 2.306 247 2.405 243 2.366 252 1.992 250 2.308 258 2.081
credibility 1,505 2.378 255 2.380 247 2.559 243 2.498 252 2.056 250 2.404 258 2.380
# Calculate total observations for each treatment arm
total_obs <- covariates[, .(n = .N), by = .(surprise, communication)]

# Create a summary of the comprehension variable
comprehension_summary <- covariates[, .(n = .N), by = .(surprise, communication, comprehension)]
comprehension_summary[, percentage := n / sum(n) * 100, by = .(surprise, communication)]

# Reshape the data to wide format
comprehension_wide <- dcast(comprehension_summary, 
                            comprehension ~ surprise + communication, 
                            value.var = c("n", "percentage"))

# Calculate pooled values
comprehension_wide[, n_pooled := rowSums(.SD), .SDcols = patterns("^n_")]
comprehension_wide[, percentage_pooled := n_pooled / sum(n_pooled) * 100]

# Create a row for total observations
total_row <- data.table(
  comprehension = "Total",
  n_pooled = sum(total_obs$n),
  percentage_pooled = NA_real_,  # Set to NA to hide the percentage
  n_FALSE_point = total_obs[surprise == FALSE & communication == "point", n],
  percentage_FALSE_point = NA_real_,  # Set to NA to hide the percentage
  n_FALSE_both = total_obs[surprise == FALSE & communication == "both", n],
  percentage_FALSE_both = NA_real_,  # Set to NA to hide the percentage
  n_FALSE_interval = total_obs[surprise == FALSE & communication == "interval", n],
  percentage_FALSE_interval = NA_real_,  # Set to NA to hide the percentage
  n_TRUE_point = total_obs[surprise == TRUE & communication == "point", n],
  percentage_TRUE_point = NA_real_,  # Set to NA to hide the percentage
  n_TRUE_both = total_obs[surprise == TRUE & communication == "both", n],
  percentage_TRUE_both = NA_real_,  # Set to NA to hide the percentage
  n_TRUE_interval = total_obs[surprise == TRUE & communication == "interval", n],
  percentage_TRUE_interval = NA_real_  # Set to NA to hide the percentage
)

# Combine total row with comprehension data
comprehension_wide <- rbindlist(list(total_row, comprehension_wide), fill = TRUE)

# Create the table
gt_table <- comprehension_wide %>%
  gt() %>%
  cols_label(
    comprehension = "comprehension",
    n_pooled = "N", percentage_pooled = "%",
    n_FALSE_point = "N", percentage_FALSE_point = "%",
    n_FALSE_both = "N", percentage_FALSE_both = "%",
    n_FALSE_interval = "N", percentage_FALSE_interval = "%",
    n_TRUE_point = "N", percentage_TRUE_point = "%",
    n_TRUE_both = "N", percentage_TRUE_both = "%",
    n_TRUE_interval = "N", percentage_TRUE_interval = "%"
  ) %>%
  tab_spanner(
    label = "Pooled",
    columns = c(n_pooled, percentage_pooled)
  ) %>%
  tab_spanner(
    label = "Confirmation",
    columns = c(n_FALSE_point, percentage_FALSE_point, n_FALSE_both, percentage_FALSE_both, n_FALSE_interval, percentage_FALSE_interval),
    level = 1
  ) %>%
  tab_spanner(
    label = "Contradiction",
    columns = c(n_TRUE_point, percentage_TRUE_point, n_TRUE_both, percentage_TRUE_both, n_TRUE_interval, percentage_TRUE_interval),
    level = 1
  ) %>%
  tab_spanner(
    label = "Point",
    id = "conf_point",
    columns = c(n_FALSE_point, percentage_FALSE_point),
    level = 2
  ) %>%
  tab_spanner(
    label = "Both",
    id = "conf_both",
    columns = c(n_FALSE_both, percentage_FALSE_both),
    level = 2
  ) %>%
  tab_spanner(
    label = "Interval",
    id = "conf_interval",
    columns = c(n_FALSE_interval, percentage_FALSE_interval),
    level = 2
  ) %>%
  tab_spanner(
    label = "Point",
    id = "contra_point",
    columns = c(n_TRUE_point, percentage_TRUE_point),
    level = 2
  ) %>%
  tab_spanner(
    label = "Both",
    id = "contra_both",
    columns = c(n_TRUE_both, percentage_TRUE_both),
    level = 2
  ) %>%
  tab_spanner(
    label = "Interval",
    id = "contra_interval",
    columns = c(n_TRUE_interval, percentage_TRUE_interval),
    level = 2
  ) %>%
  fmt_number(
    columns = contains("percentage"),
    decimals = 1,
    suffix = "%"
  ) %>%
  fmt_number(
    columns = starts_with("n"),
    decimals = 0
  ) %>%
  cols_align(
    align = "left",
    columns = c(comprehension)
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = 1
    )
  ) %>%
  fmt_missing(columns = everything(), missing_text = "") 

gt_table
Table 4.2: Descriptive statistics (control variables): Mean values per treatment
Point Both Interval Point Both Interval
comprehension Pooled Confirmation Contradiction
N % N % N % N % N % N % N %
Total 1,505
255
247
243
252
250
258
no 32 2.1 5 2.0 5 2.0 8 3.3 3 1.2 5 2.0 6 2.3
rather not 254 16.9 44 17.3 40 16.2 38 15.6 50 19.8 43 17.2 39 15.1
rather yes 739 49.1 132 51.8 114 46.2 125 51.4 112 44.4 131 52.4 125 48.4
yes 480 31.9 74 29.0 88 35.6 72 29.6 87 34.5 71 28.4 88 34.1

Session Info

sessionInfo()
R version 4.4.1 (2024-06-14)
Platform: x86_64-apple-darwin20
Running under: macOS Sonoma 14.4.1

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/Zurich
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] gt_0.11.0         Rmisc_1.5.1       plyr_1.8.9        lattice_0.22-6   
[5] stringr_1.5.1     data.table_1.15.4 magrittr_2.0.3   

loaded via a namespace (and not attached):
 [1] vctrs_0.6.5       cli_3.6.3         knitr_1.48        rlang_1.1.4      
 [5] xfun_0.46         stringi_1.8.4     generics_0.1.3    jsonlite_1.8.8   
 [9] glue_1.7.0        htmltools_0.5.8.1 sass_0.4.9        fansi_1.0.6      
[13] rmarkdown_2.27    grid_4.4.1        tibble_3.2.1      evaluate_0.24.0  
[17] fastmap_1.2.0     yaml_2.3.10       lifecycle_1.0.4   compiler_4.4.1   
[21] dplyr_1.1.4       pkgconfig_2.0.3   htmlwidgets_1.6.4 Rcpp_1.0.13      
[25] rstudioapi_0.16.0 digest_0.6.36     R6_2.5.1          groundhog_3.2.0  
[29] tidyselect_1.2.1  utf8_1.2.4        pillar_1.9.0      parallel_4.4.1   
[33] withr_3.0.1       tools_4.4.1       xml2_1.3.6